library(ggplot2)
library(lubridate)
library(gridExtra)

## Note: due data terms of use, we cannot share the CrowdTangle data
## The code below produces the reported figures, with placeholders to read in the associated data

#  Race Topics --------------------------------------------------
ct.raceposts <- read.csv("")
names(ct.raceposts)
table(ct.raceposts$Post.Created)

ct.raceposts$date <- as.character(ct.raceposts$Post.Created)
ct.raceposts$date <- gsub(x=ct.raceposts$date,pattern="EDT",replacement="",fixed=T)

ct.raceposts$date <- as.Date(ct.raceposts$Post.Created)
ct.raceposts$month <- format(ct.raceposts$date, "%m")
ct.raceposts$year <- format(ct.raceposts$date, "%Y")

ct.raceposts$date2 <- paste0("2020-", ct.raceposts$month, "-01")
ct.raceposts$date2 <- as.Date(ct.raceposts$date2)


table(ct.raceposts$date)
table(ct.raceposts$date2)
table(ct.raceposts$date, ct.raceposts$date2)


rrplot1 <- ggplot(ct.raceposts, aes(x = date)) +
  stat_count(position=position_dodge()) +
  scale_x_date(date_breaks = "1 months", date_labels = "%b %Y",
               limits = as.Date(c("2020-02-24", "2020-08-27"))) +
  ylim(0, 100) +
  theme_bw() +   
  labs(x = "", y = "Count of Mentions") +
  ggtitle("Race Topics") +
  theme(axis.text.x = element_text(size = 16, angle = 90, hjust = 1, vjust = 0.5),
        axis.text.y = element_text(size = 16),
        axis.title.y = element_text(size = 16)) + 
  theme(plot.title = element_text(size=24, hjust = 0.5)) +
  theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) 



#  Placebo Sleep Topics --------------------------------------------------
ct.sleepposts <- read.csv("")
names(ct.sleepposts)
table(ct.sleepposts$Post.Created)

ct.sleepposts$date <- as.character(ct.sleepposts$Post.Created)
ct.sleepposts$date <- gsub(x=ct.sleepposts$date,pattern="EDT",replacement="",fixed=T)

ct.sleepposts$date <- as.Date(ct.sleepposts$Post.Created)
ct.sleepposts$month <- format(ct.sleepposts$date, "%m")
ct.sleepposts$year <- format(ct.sleepposts$date, "%Y")

ct.sleepposts$date2 <- paste0("2020-", ct.sleepposts$month, "-01")
ct.sleepposts$date2 <- as.Date(ct.sleepposts$date2)


table(ct.sleepposts$date)
table(ct.sleepposts$date2)
table(ct.sleepposts$date, ct.sleepposts$date2)


rrplot2 <- ggplot(ct.sleepposts, aes(x = date)) +
  stat_count(position=position_dodge()) +
  scale_x_date(date_breaks = "1 months", date_labels = "%b %Y",
               limits = as.Date(c("2020-02-24", "2020-08-27"))) +
  ylim(0, 100) +
  theme_bw() +   
  labs(x = "", y = "Count of Mentions") +
  ggtitle("Sleep Topics") +
  theme(axis.text.x = element_text(size = 16, angle = 90, hjust = 1, vjust = 0.5),
        axis.text.y = element_text(size = 16),
        axis.title.y = element_text(size = 16)) + 
  theme(plot.title = element_text(size=24, hjust = 0.5)) +
  theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) 


# * Combine --------------------------------------------------------------

# pdf(file = "fig2.pdf", width = 10, height = 5)
grid.arrange(rrplot1, rrplot2, nrow = 1)
# dev.off()



# Data Descriptives -------------------------------------------------------

#split race sample to before floyd's murder and day of or after
pre.rr <- subset(ct.raceposts, ct.raceposts$date < as.Date("2020-05-24"))
table(pre.rr$date)

post.rr <- subset(ct.raceposts, ct.raceposts$date > as.Date("2020-05-23"))
table(post.rr$date)

length(pre.rr$date) ##128 posts
length(post.rr$date) ##1540 posts
1540/128 ##12.03125 times more posts after


#split sleep sample to before floyd's murder and day of or after
pre.sleep <- subset(ct.sleepposts, ct.sleepposts$date < as.Date("2020-05-24"))
table(pre.sleep$date)

post.sleep <- subset(ct.sleepposts, ct.sleepposts$date > as.Date("2020-05-23"))
table(post.sleep$date)

length(pre.sleep$date) ##1018 posts
length(post.sleep$date) ##1017 posts
#1 post difference across time periods

##april comparisons

april.race <- subset(ct.raceposts, ct.raceposts$date > as.Date("2020-03-31") & ct.raceposts$date < as.Date("2020-05-01"))
a <- table(april.race$date)
b <- as.data.frame(a)

sum(b$Freq)/30 #1.13 posts per day average on race in April

april.sleep <- subset(ct.sleepposts, ct.sleepposts$date > as.Date("2020-03-31") & ct.sleepposts$date < as.Date("2020-05-01"))
c <- table(april.sleep$date)
d <- as.data.frame(c)

sum(d$Freq)/30 #11.3 posts per day average on sleep in April



##june comparisons

june.race <- subset(ct.raceposts, ct.raceposts$date > as.Date("2020-05-31") & ct.raceposts$date < as.Date("2020-07-01"))
table(june.race$date) #range of 8 posts to 83 posts per day
e <- table(june.race$date)
f <- as.data.frame(e)

sum(f$Freq)/30 #32.6 posts per day average on race in June


june.sleep <- subset(ct.sleepposts, ct.sleepposts$date > as.Date("2020-05-31") & ct.sleepposts$date < as.Date("2020-07-01"))
g <- table(june.sleep$date)
h <- as.data.frame(g)

sum(h$Freq)/30 #10.8 posts per day average on race in June







